# SAMPLE NAME
## specify sample name
sample.name <- c("beau", "ophio_cflo", "ophio_kim")
## specify names for all ocflo samples
sampleName <- c("ophio_cflo","ophio_ophio-infected")
## color scheme for the samples
col.scheme <- c("#5A829F", "#AD212F", "black", "#5C2849")

# SCRIPT NAME
## specify the name of the script (folder) where figures will be saved
script.name <- "01_comparing_gene_exp_ophio_beau"

# eJTK OUTPUT
## Set GammaP threshold below which genes are classified as rhythmic
gamma.pval = 0.05
## Set false discovery rate for functional enrichment analyses
FDR = 5
# LOAD DATABASES (TC7)
# 1. TC6_ejtk.db
# Desc: This database contains all ejtk-output for TC6
ejtk.db <- dbConnect(RSQLite::SQLite(), paste0(path_to_repo,"/data/databases/TC6_fungal_ejtk.db"))
# which tables are in the database
src_dbi(ejtk.db)
## src:  sqlite 3.29.0 [/Users/biplabendudas/Documents/GitHub/Das_et_al_2022a/data/databases/TC6_fungal_ejtk.db]
## tbls: beau_rhythmic_genes_12h, beau_rhythmic_genes_24h, beau_zscores_08h,
##   beau_zscores_12h, beau_zscores_24h, ophio_cflo_rhythmic_genes_12h,
##   ophio_cflo_rhythmic_genes_24h, ophio_cflo_zscores_08h,
##   ophio_cflo_zscores_12h, ophio_cflo_zscores_24h, ophio_kim_DD_zscores_24h,
##   ophio_kim_LD_rhythmic_genes_24h, ophio_kim_LD_zscores_24h
#
# 2. TC6_data.db
data.db <- dbConnect(RSQLite::SQLite(), paste0(path_to_repo,"/data/databases/TC6_fungal_data.db"))
src_dbi(data.db)
## src:  sqlite 3.29.0 [/Users/biplabendudas/Documents/GitHub/Das_et_al_2022a/data/databases/TC6_fungal_data.db]
## tbls: beau_expressed_genes, beau_fpkm, beau_log2fpkm, beau_zscores,
##   ophio_cflo_expressed_genes, ophio_cflo_fpkm, ophio_cflo_log2fpkm,
##   ophio_cflo_zscores, ophio_kim_DD_expressed_genes, ophio_kim_DD_fpkm,
##   ophio_kim_DD_log2fpkm, ophio_kim_DD_zscores, ophio_kim_expressed_genes,
##   ophio_kim_fpkm, ophio_kim_log2fpkm, ophio_kim_zscores
#

Overview/Goals

1. General patterns of gene expression

# number of all genes
all.genes <- list()
for (i in 1:length(sample.name)) {
  all.genes[[i]] <- tbl(data.db, paste0(sample.name[[i]] ,"_fpkm")) %>%  
    collect()
  
  writeLines(paste("Number of genes in", sample.name[[i]], ":", nrow(all.genes[[i]])))
}
## Number of genes in beau : 10364
## Number of genes in ophio_cflo : 7455
## Number of genes in ophio_kim : 8577
# A1: genes that have NO expression (FPKM == 0 at all time points)
not.expressed <- list()
for (i in 1:length(sample.name)) {
  not.expressed[[i]] <-
    tbl(data.db, paste0(sample.name[[i]] ,"_fpkm")) %>% 
    collect() %>% 
    filter_at(vars(starts_with("Z")), all_vars(. == 0)) %>%
    pull(gene_name)
  
  # How many genes are not expressed?
  writeLines(paste("n(genes-NOT-EXPRESSED) in", sample.name[[i]], ":", length(not.expressed[[i]])))
  
}
## n(genes-NOT-EXPRESSED) in beau : 759
## n(genes-NOT-EXPRESSED) in ophio_cflo : 190
## n(genes-NOT-EXPRESSED) in ophio_kim : 111
# A2: run enrichment (make plot of enrichment found of non-expressed genes)
for (i in 1:length(sample.name)) {
  writeLines(paste("running GO enrichment for NOT-EXPRESSED genes in", sample.name[[i]]))
  # run enrichment
  not.expressed[[i]] %>% 
    
    check_enrichment(.,
                      org = sample.name[[i]],
                      what = "pfams",
                      function.dir = path_to_repo,
                      bg = 'all') %>% 
    
    # # pull gene names for a given GO term
    # separate_rows(., gene_name, sep = ", ") %>%
    # filter(GO == "GO:0009405") %>% # pathogenesis
    # # filter(GO == "GO:0090729") %>% # toxin activity
    # # filter(GO == "GO:0044419") %>% # interspecies interaction between organisms
    # # filter(GO == "GO:0020037") %>% # heme binding
    # pull()
    
    # go_enrichment_plot(clean = "no",
    #                    function.dir = path_to_repo) %>% 
    
  print()
  
}
## running GO enrichment for NOT-EXPRESSED genes in beau
## [1] "Loading annotation file for Beauveria bassiana"
## [1] "Done."
## [1] "Testing for enrichment..."

## # A tibble: 16 x 7
##    annot_term annot_desc  adj_pVal sam_freq back_freq n_annot_bg gene_name      
##    <chr>      <chr>          <dbl>    <dbl>     <dbl>      <int> <chr>          
##  1 no_annot   no_desc     0           0.584     0.275       2854 BBA_00003, BBA…
##  2 PF05699    Dimer_Tnp_… 0           0.016     0.002         23 BBA_03709, BBA…
##  3 PF13857    Ank_5       0.00011     0.014     0.004         38 BBA_00617, BBA…
##  4 PF00078    RVT_1       0.000300    0.008     0.001         15 BBA_09633, BBA…
##  5 PF13391    HNH_2       0.000300    0.007     0.001         11 BBA_07949, BBA…
##  6 PF13606    Ank_3       0.00048     0.014     0.004         46 BBA_00617, BBA…
##  7 PF12796    Ank_2       0.00059     0.018     0.007         69 BBA_00617, BBA…
##  8 PF00023    Ank         0.00071     0.017     0.006         64 BBA_00617, BBA…
##  9 PF13637    Ank_4       0.00071     0.017     0.006         64 BBA_00617, BBA…
## 10 PF06985    HET         0.0176      0.007     0.002         23 BBA_00683, BBA…
## 11 PF11807    DUF3328     0.0254      0.007     0.002         25 BBA_00019, BBA…
## 12 PF00082    Peptidase_… 0.0304      0.008     0.003         34 BBA_00619, BBA…
## 13 PF05729    NACHT       0.0304      0.007     0.003         27 BBA_00617, BBA…
## 14 PF00067    p450        0.0304      0.014     0.007         77 BBA_02747, BBA…
## 15 PF01565    FAD_bindin… 0.0391      0.007     0.003         29 BBA_00709, BBA…
## 16 PF13847    Methyltran… 0.0391      0.008     0.004         37 BBA_09438, BBA…
## running GO enrichment for NOT-EXPRESSED genes in ophio_cflo
## [1] "Loading annotation file for Ophiocordyceps camponoti-floridani"
## [1] "Done."
## [1] "Testing for enrichment..."

## # A tibble: 3 x 7
##   annot_term annot_desc  adj_pVal sam_freq back_freq n_annot_bg gene_name       
##   <chr>      <chr>          <dbl>    <dbl>     <dbl>      <int> <chr>           
## 1 no_annot   no_desc      0          0.63      0.421       1992 GQ602_000673, G…
## 2 PF00067    p450         0.00048    0.045     0.011         51 GQ602_001415, G…
## 3 PF01375    Enterotoxi…  0.00104    0.032     0.007         34 GQ602_002072, G…
## running GO enrichment for NOT-EXPRESSED genes in ophio_kim
## [1] "Loading annotation file for Ophiocordyceps kimflemingae"
## [1] "Done."
## [1] "There are no pfams terms to test enrichment for."
# B: genes that are expressed (FPKM > 1 for at least one time point)
expressed <- list()
for (i in 1:length(sample.name)) {
  expressed[[i]] <- 
    tbl(data.db, paste0(sample.name[[i]],"_expressed_genes")) %>% 
    filter(expressed=="yes") %>% 
    collect() %>% 
    pull(gene_name) 
  
  # How many genes are expressed?
  writeLines(paste("n(EXPRESSED) in", sample.name[[i]], ":", length(expressed[[i]])))
}
## n(EXPRESSED) in beau : 9006
## n(EXPRESSED) in ophio_cflo : 6998
## n(EXPRESSED) in ophio_kim : 8150

2. Daily rhythms in gene expression

## Load all the rhythmic genesets 
## Note, ordered according to their p-value; highly rhythmic at the top.
#
# Choose period
period = '24'

## 
rhy <- list()
for (i in 1:2) {
  rhy[[i]] <-
    tbl(ejtk.db, paste0(sample.name[[i]],"_zscores_",period,'h')) %>%
    filter(GammaP < gamma.pval) %>%
    select(ID, GammaP) %>% collect() %>% arrange(GammaP) %>%
    select(ID) %>% pull()
  
  # How many genes are rythmic?
  writeLines(paste0("n(rhythmic-",period, "h) in ", sample.name[[i]], " : ", length(rhy[[i]])))
}
## n(rhythmic-24h) in beau : 1872
## n(rhythmic-24h) in ophio_cflo : 2285

Hierarchical clustering of rhy24

  • perform hierarchical clustering of 24h-rhythmic genes into four clusters;
  • plot time-course heatmaps for the clustered 24h-rhythmic geneset
  • Identify the day-peaking and night-peaking clusters visually.
## initialise lists to hold input and output of the hierarchical clustering
zscore.dat <- list() # zscore data (input)
my_gene_col <- list() # cluster identity for each rhythmic gene (output)
rhy.heat <- list() # pheatmap that can be saved/plotted (output)

# specify number of clusters
n_clusters <- 4

## run clustering and plot
for (i in 1:2) {
  ## load zscore dataset
  zscore.dat[[i]] <- data.db %>% tbl(., paste0(sample.name[[i]],"_zscores")) %>% collect()
  
  # Filter the zscores to keep only rhythmic genes
  zscore.rhy <-
    zscore.dat[[i]] %>% 
    filter(gene_name %in% rhy[[i]]) %>% 
    as.data.frame()
  
  # Set genes as rownames and convert it into a matrix
  rownames(zscore.rhy) = zscore.rhy$gene_name
  zscore.rhy <- as.matrix(zscore.rhy[-1])
  
  
  # Hierarchical clustering of the genesets
  my_hclust_gene <- hclust(dist(zscore.rhy), method = "complete")
  
  
  # Make annotations for the heatmaps
  my_clusters <- cutree(tree = as.dendrogram(my_hclust_gene), k = n_clusters) # k=  clusters
  my_gene_col[[i]] <- data.frame(cluster = my_clusters)
  
  
  # I’ll add some column annotations and create the heatmap.
  # Annotations for:
  # 1. Is the sample collected during the light or dark phase? 
  my_sample_col <- data.frame(phase = rep(c("light", "dark", "light"), c(5,6,1)))
  row.names(my_sample_col) <- colnames(zscore.rhy)
  
  # Manual color palette
  my_colour = list(
    phase = c(light = "#F2E18D", dark = "#010440"),
    cluster = viridis::cividis(100)[c(seq(10,80,by=round(80/(n_clusters), 0)))])
  
  # Color scale
  my.breaks = seq(min(zscore.rhy), max(zscore.rhy), by=0.1)
  # my.breaks = seq(min(zscore.rhy), max(zscore.rhy), by=0.06)
  
  # Let's plot!
  rhy.heat[[i]] <-
    pheatmap(zscore.rhy, show_rownames = F, show_colnames = F,
             annotation_row = my_gene_col[[i]], 
             annotation_col = my_sample_col,
             cutree_rows = n_clusters, # OG was 4
             # cutree_cols = 2,
             annotation_colors = my_colour,
             border_color=FALSE,
             cluster_cols = F,
             breaks = my.breaks,
             ## color scheme borrowed from: 
             color = inferno(length(my.breaks) - 1),
             treeheight_row = 0,
             # treeheight_col = 0,
             # remove the color scale or not
             main = paste0(sample.name[[i]], " 24h-rhythmic \n (n=", nrow(zscore.rhy), " genes)"),
             ## annotation legend
             annotation_legend = T,
             ## Color scale
             legend = T)
  
}

Phase plots

rhy.24.sig <- list()
phase.ejtk <- list()

# Obtain the phases of 24h-rhythmic genes beau v. ophio_cflo
for (i in 1:2) {

rhy.24.sig[[i]] <- 
  tbl(ejtk.db, paste0(sample.name[i],"_zscores_24h")) %>% 
  filter(GammaP < gamma.pval) %>% 
  collect()

# Get the phases of the best matched waveforms
phase.ejtk[[i]] <- circular::circular(rhy.24.sig[[i]]$Phase, units="hours", template="clock24")
# # Get the time-of-day of expression peak
# phase.ejtk[[i]] <- circular::circular(rhy.24.sig[[i]]$MaxLoc, units="hours", template="clock24")
# # Get the time-of-day of expression trough
# phase.ejtk[[i]] <- circular::circular(rhy.24.sig[[i]]$MinLoc, units="hours", template="clock24")

}

# save all the circular phases in a list
l.phases <- phase.ejtk
# let's name the list elements for later use and reference
names(l.phases) <- sample.name[1:2]

writeLines("Performing Watson test to check if the average peak of 24h-rhythms in Beau and Ophio-cflo differs significantly")
## Performing Watson test to check if the average peak of 24h-rhythms in Beau and Ophio-cflo differs significantly
# For all rhy genes
beau.ophio <- watson.two.test(l.phases[[1]],l.phases[[2]], alpha = FDR/100)
writeLines("Beau v. Ophio-cflo")
## Beau v. Ophio-cflo
beau.ophio %>% print()
## 
##       Watson's Two-Sample Test of Homogeneity 
## 
## Test Statistic: 5.9634 
## Level 0.05 Critical Value: 0.187 
## Reject Null Hypothesis
## Plot the phase distributions

# Initialize a list for saving the ggplots
g <- list()

means <- as.numeric(lapply(phase.ejtk, mean))
means <- circular(means, units="hours", template="clock24")


for(i in 1:length(l.phases)) {
  
  # define phase levels
  ordered_phases <- c("2","4","6","8","10","12",
                      "14","16","18","20","22","24")
    
  df.test <- l.phases[[i]] %>%
    as.data.frame() %>% 
    mutate(phase = x) %>%
    mutate(phase = replace(phase, x=="0", "24")) %>% 
    select(-x) %>% 
    group_by(phase = factor(phase, levels = ordered_phases)) %>%
    summarise(n_genes = n())

  m <- as.numeric(means[i])
    
  g[[i]] <-
    ggplot(df.test, aes(x=factor(phase), y=n_genes)) + 
    # indicate light-dark phase
    geom_rect(aes(xmin = as.factor(12), xmax = as.factor(24), ymin = -Inf, ymax = Inf),
              fill = "grey80", alpha = 0.05, color=NA) +
    geom_bar(stat='identity', fill=col.scheme[[i]]) +
    
    xlab(c(names(l.phases)[i])) +
    
    scale_y_continuous(breaks = c(0,300,600), limits = c(0,700)) +
    
    coord_polar() +
    theme_Publication(base_size = 20) +
    theme(# axis.title.x=element_blank(),
              # axis.text.x=element_blank(),
              legend.position = "none")
    #ggtitle(paste0("Dataset: ", names(l.phases)[i])) 
  
}

ggpubr::ggarrange(plotlist=g,
                  nrow = 1, ncol = 2,
                  widths = c(1,1), labels = NA)

Clusters of rhythmic genes

  • Which processes are identified clusters of rhythmic genes involved in?
  • How do they fluctuate throughout the day?

Beau - 24h

for (i in 1:n_clusters){
  
  writeLines(paste0("Species: ", sample.name[[1]], "\n", "24h-rhythmic genes, Cluster: ", i))
  
  # Summary
  genes <- my_gene_col[[1]] %>% rownames_to_column("g") %>% filter(cluster==as.character(i)) %>% pull(g)
  writeLines(paste0("n(genes) = ", length(genes),"\n"))
  
  # Enrichment
  overrepresented.terms <-
    genes %>% 
      check_enrichment(.,
                    function.dir = path_to_repo,
                    what = "pfams",
                    org = sample.name[[1]],
                    bg = expressed[[1]],
                    filter = T,
                    plot = F)
  writeLines(paste0("\n", "n(overrepresented terms) = ", nrow(overrepresented.terms), "\n"))
  print(overrepresented.terms %>% as_tibble())

  # Stacked zplot
  genes %>% 
  stacked.zplot_tc6(cond = "beau") %>% 
    multi.plot(rows = 1, cols = 1)
  
}
## Species: beau
## 24h-rhythmic genes, Cluster: 1
## n(genes) = 767
## 
## [1] "Loading annotation file for Beauveria bassiana"
## [1] "Done."
## [1] "Testing for enrichment..."
## 
## n(overrepresented terms) = 28
## 
## # A tibble: 28 x 7
##    annot_term annot_desc  adj_pVal sam_freq back_freq n_annot_bg gene_name      
##    <chr>      <chr>          <dbl>    <dbl>     <dbl>      <int> <chr>          
##  1 PF03810    IBN_N       0           0.009     0.001          8 BBA_00780, BBA…
##  2 PF07714    Pkinase_Tyr 0           0.036     0.012        105 BBA_00171, BBA…
##  3 PF00069    Pkinase     0           0.037     0.014        124 BBA_00171, BBA…
##  4 PF02535    Zip         0.00008     0.007     0.001          8 BBA_01230, BBA…
##  5 PF00689    Cation_ATP… 0.00012     0.008     0.001         12 BBA_00548, BBA…
##  6 PF00512    HisKA       0.000130    0.007     0.001          9 BBA_00328, BBA…
##  7 PF05729    NACHT       0.000130    0.009     0.002         17 BBA_01725, BBA…
##  8 PF00072    Response_r… 0.000130    0.008     0.001         13 BBA_00328, BBA…
##  9 PF00690    Cation_ATP… 0.000130    0.008     0.001         13 BBA_00548, BBA…
## 10 PF04082    Fungal_tra… 0.00017     0.032     0.014        125 BBA_00237, BBA…
## # … with 18 more rows

## Species: beau
## 24h-rhythmic genes, Cluster: 2
## n(genes) = 550
## 
## [1] "Loading annotation file for Beauveria bassiana"
## [1] "Done."
## [1] "Testing for enrichment..."
## 
## n(overrepresented terms) = 2
## 
## # A tibble: 2 x 7
##   annot_term annot_desc adj_pVal sam_freq back_freq n_annot_bg gene_name        
##   <chr>      <chr>         <dbl>    <dbl>     <dbl>      <int> <chr>            
## 1 "PF00226"  DnaJ        0.00041    0.013     0.003         23 BBA_00135, BBA_0…
## 2 ""         no_desc     0.00199    0.819     0.757       6815 BBA_00007, BBA_0…

## Species: beau
## 24h-rhythmic genes, Cluster: 3
## n(genes) = 337
## 
## [1] "Loading annotation file for Beauveria bassiana"
## [1] "Done."
## [1] "Testing for enrichment..."
## 
## n(overrepresented terms) = 2
## 
## # A tibble: 2 x 7
##   annot_term annot_desc  adj_pVal sam_freq back_freq n_annot_bg gene_name       
##   <chr>      <chr>          <dbl>    <dbl>     <dbl>      <int> <chr>           
## 1 PF08659    KR            0.0170    0.021     0.007         60 BBA_01241, BBA_…
## 2 PF13561    adh_short_…   0.0198    0.021     0.008         68 BBA_00446, BBA_…

## Species: beau
## 24h-rhythmic genes, Cluster: 4
## n(genes) = 218
## 
## [1] "Loading annotation file for Beauveria bassiana"
## [1] "Done."
## [1] "Testing for enrichment..."
## 
## n(overrepresented terms) = 2
## 
## # A tibble: 2 x 7
##   annot_term annot_desc adj_pVal sam_freq back_freq n_annot_bg gene_name        
##   <chr>      <chr>         <dbl>    <dbl>     <dbl>      <int> <chr>            
## 1 "PF00076"  RRM_1       0.00656    0.023     0.006         55 BBA_00301, BBA_0…
## 2 ""         no_desc     0.00943    0.832     0.757       6815 BBA_00129, BBA_0…

Ophio-cflo - 24h

for (i in 1:n_clusters){
  
  writeLines(paste0("Species: ", sample.name[[2]], "\n", "24h-rhythmic genes, Cluster: ", i))
  
  # Summary
  genes <- my_gene_col[[2]] %>% rownames_to_column("g") %>% filter(cluster==as.character(i)) %>% pull(g)
  writeLines(paste0("n(genes) = ", length(genes),"\n"))
  
  # Enrichment
  overrepresented.terms <-
    genes %>% 
      check_enrichment(.,
                    function.dir = path_to_repo,
                    what = "pfams",
                    org = sample.name[[2]],
                    bg = expressed[[2]],
                    filter = T,
                    plot = F)
  writeLines(paste0("\n", "n(overrepresented terms) = ", nrow(overrepresented.terms), "\n"))
  print(overrepresented.terms %>% as_tibble())
  
  # Stacked zplot
      stacked.plot1 <- genes %>% stacked.zplot_tc6(cond = sampleName[[1]]) %>% pluck(1)
      stacked.plot2 <- genes %>% stacked.zplot_tc6(cond = sampleName[[2]]) %>% pluck(1)
      ggpubr::ggarrange(plotlist=list(stacked.plot1, stacked.plot2),
                    nrow = 1, ncol = 2,
                    widths = c(1,1), labels = NA) %>% 
        print()
  
}
## Species: ophio_cflo
## 24h-rhythmic genes, Cluster: 1
## n(genes) = 833
## 
## [1] "Loading annotation file for Ophiocordyceps camponoti-floridani"
## [1] "Done."
## [1] "Testing for enrichment..."
## 
## n(overrepresented terms) = 25
## 
## # A tibble: 25 x 7
##    annot_term annot_desc  adj_pVal sam_freq back_freq n_annot_bg gene_name      
##    <chr>      <chr>          <dbl>    <dbl>     <dbl>      <int> <chr>          
##  1 PF00512    HisKA        0.00003    0.01      0.002          7 GQ602_001137, …
##  2 PF00072    Response_r…  0.00008    0.012     0.002         10 GQ602_001137, …
##  3 PF00632    HECT         0.00008    0.009     0.001          6 GQ602_000948, …
##  4 PF00664    ABC_membra…  0.00038    0.014     0.003         15 GQ602_000447, …
##  5 PF14604    SH3_9        0.00066    0.012     0.003         13 GQ602_000765, …
##  6 PF07653    SH3_2        0.00259    0.01      0.003         12 GQ602_000765, …
##  7 PF00018    SH3_1        0.00337    0.012     0.004         16 GQ602_000765, …
##  8 PF02518    HATPase_c    0.00337    0.01      0.003         13 GQ602_001137, …
##  9 PF02801    Ketoacyl-s…  0.00337    0.01      0.003         13 GQ602_002162, …
## 10 PF00109    ketoacyl-s…  0.00498    0.01      0.003         14 GQ602_002162, …
## # … with 15 more rows

## Species: ophio_cflo
## 24h-rhythmic genes, Cluster: 2
## n(genes) = 465
## 
## [1] "Loading annotation file for Ophiocordyceps camponoti-floridani"
## [1] "Done."
## [1] "Testing for enrichment..."
## 
## n(overrepresented terms) = 2
## 
## # A tibble: 2 x 7
##   annot_term annot_desc  adj_pVal sam_freq back_freq n_annot_bg gene_name       
##   <chr>      <chr>          <dbl>    <dbl>     <dbl>      <int> <chr>           
## 1 PF04082    Fungal_tra…  0.00094    0.039     0.013         54 GQ602_000498, G…
## 2 PF00172    Zn_clus      0.0468     0.036     0.017         75 GQ602_000498, G…

## Species: ophio_cflo
## 24h-rhythmic genes, Cluster: 3
## n(genes) = 354
## 
## [1] "Loading annotation file for Ophiocordyceps camponoti-floridani"
## [1] "Done."
## [1] "Testing for enrichment..."
## 
## n(overrepresented terms) = 0
## 
## # A tibble: 0 x 7
## # … with 7 variables: annot_term <chr>, annot_desc <chr>, adj_pVal <dbl>,
## #   sam_freq <dbl>, back_freq <dbl>, n_annot_bg <int>, gene_name <chr>

## Species: ophio_cflo
## 24h-rhythmic genes, Cluster: 4
## n(genes) = 633
## 
## [1] "Loading annotation file for Ophiocordyceps camponoti-floridani"
## [1] "Done."
## [1] "Testing for enrichment..."
## 
## n(overrepresented terms) = 4
## 
## # A tibble: 4 x 7
##   annot_term annot_desc adj_pVal sam_freq back_freq n_annot_bg gene_name        
##   <chr>      <chr>         <dbl>    <dbl>     <dbl>      <int> <chr>            
## 1 PF00118    Cpn60_TCP1  0          0.024     0.002         10 GQ602_000364, GQ…
## 2 PF00076    RRM_1       0          0.058     0.014         60 GQ602_000255, GQ…
## 3 PF00004    AAA         0.00033    0.034     0.01          45 GQ602_000605, GQ…
## 4 PF00400    WD40        0.0168     0.041     0.021         91 GQ602_000228, GQ…

reducing redundant GO terms

Note to self: If we use GO terms instead of pfams, we will need to reduce the redundant GO annotations that is present in fungal annotation data.

Two options to do this:

Option #1: Get the list of overrepresented GO terms and their associated p-values and use REVIGO portal online to reduce the redundant terms

Option #2: Use the scripts provided by REVIGO to programmatically run REVIGO using bash/R. For more information see (here)[http://revigo.irb.hr/FAQ.aspx#q07] Status: tried running it via bash, and it didn’t work; NEED TO FIGURE IT OUT.


3. Beau v. Ophio - 24h-rhy

Next, we compare the homologous genes in both the fungi to understand if the rhythmic genes (and processes) in the two fungi are similar or not; also, is there any differences in the daily expression of these genes between the two fungal parasites?

Obtain homology data

# Read the source file
homology.file <- "ophio_beau_homology.csv"
homology.file <- 
  paste0(path_to_repo, "/results/proteinortho/", homology.file) %>% 
  read.csv(., stringsAsFactors = F, na.strings = c(" ","","NA"))

# Clean the source file to keep distinct gene-gene homologs
homology.dat <-
  homology.file %>% 
  # names() %>% 
  select(ophio_gene, beau_gene) %>% 
  na.omit() %>% 
  distinct() %>% 
  group_by(beau_gene) %>% 
  filter(n()==1) %>% 
  select(beau_gene, ophio_gene)

writeLines(paste("Of the", length(expressed[[2]]), "genes expressed in Ophio-cflo,",
                 "and", length(expressed[[1]]), "genes expressed in Beau,\n",
                 nrow(homology.dat), "genes show one-to-one orthology"))
## Of the 6998 genes expressed in Ophio-cflo, and 9006 genes expressed in Beau,
##  5274 genes show one-to-one orthology

Summarize the results

for (i in 1:2){
  
  # exp.dat <- expressed[[i]]
  rhy.dat <- rhy[[i]]
  ortho.dat <- homology.dat %>% pull(i) 

  listInput <- list(rhy.dat, ortho.dat)
  names(listInput) <- c(paste0(sample.name[[i]], c("_rhy24","_ortho")))
  
  library(UpSetR)
  library(viridis)
  # caste.col <- c("#F23030","#1A80D9")
  upset(fromList(listInput), 
        number.angles = 0, point.size = 3, line.size = 1.5, 
        mainbar.y.label = "Number of overlapping genes", 
        sets.x.label = "Sig. rhy genes", 
        text.scale = c(1.5, # y-axis label ("# overlapping genes")
                       2, # y-axis tick labels ("1000, 2000,..")
                       1.5, # label for histogram ("sig. rhy genes")
                       1, # tick labels for histogram
                       1.5, # set names ("Cflo-brain_08h,..") 
                       1.5),
        sets = names(listInput),
        nintersects = 15,
        keep.order = T,
        sets.bar.color = viridis(1),
        # adding queries
        query.legend = "bottom"
        ) %>% 
    print()
  
  writeLines(paste0(
    "The above plot shows that of the ", length(rhy.dat), " rhythmic genes in ", sample.name[[i]],
    ", how many has an 1:1 ortholog in the other fungal species. \n",
    "Note, orthologous genes were identified using proteinortho "))
}

## The above plot shows that of the 1872 rhythmic genes in beau, how many has an 1:1 ortholog in the other fungal species. 
## Note, orthologous genes were identified using proteinortho

## The above plot shows that of the 2285 rhythmic genes in ophio_cflo, how many has an 1:1 ortholog in the other fungal species. 
## Note, orthologous genes were identified using proteinortho

Rhy24 genes w/ orthologs

Next, I wanted to see if certain gene clusters show a characteristic difference in their daily expression in a species-specific manner (meaning, same genes but different daily expression in the two fungal species?).

To do so, I performed hierarchical clustering of all orthologous genes that show sig. 24h rhythms in EITHER Ocflo OR Beau to identify clusters of genes that show a synchronized expression in each of the two fungal species.

STOPPED HERE!!!

rhy.homology.dat <- 
  homology.dat %>% 
  filter(beau_gene %in% rhy[[1]] | ophio_gene %in% rhy[[2]])

Hierarchical clustering

### Make the dataframe for plotting
zscore.rhy.homology.dat <-
  zscore.dat[[1]] %>% 
  filter(gene_name %in% rhy.homology.dat[[1]]) %>% 
  rename_at(vars(starts_with("ZT")), ~ (gsub("A", "B", .x, fixed = TRUE))) %>% # fix colnames for beau
  # add ophio homologs for the beau genes
  left_join(rhy.homology.dat, by=c("gene_name" = "beau_gene")) %>%
  # remove the beau names and keep the ophio names only
  select(-1) %>% 
  select(gene_name = ophio_gene, everything()) %>% 
  # join ophio-cflo data
  left_join(zscore.dat[[2]], by="gene_name") %>%
  # drop any genes without expression values (NA)
  na.omit() %>% 
  as.data.frame() %>% 
  # set genes as rownames
  column_to_rownames("gene_name")

# Set genes as rownames and convert it into a matrix
# rownames(zscore.rhy.homology.dat) = zscore.rhy.homology.dat$gene_name
zscore.rhy.homology.dat <- as.matrix(zscore.rhy.homology.dat)


# Hierarchical clustering of the genesets
my_hclust_gene <- hclust(dist(zscore.rhy.homology.dat), method = "complete")


# Make annotations for the heatmaps
n_clusters <- 4
my_clusters <- cutree(tree = as.dendrogram(my_hclust_gene), k = n_clusters) # k=  clusters
my_gene_col <- data.frame(cluster = my_clusters)


# I’ll add some column annotations and create the heatmap.
# Annotations for:
# 1. Is the sample collected during the light or dark phase? 
my_sample_col <- data.frame(phase = rep(rep(c("light", "dark", "light"),c(5,6,1)),2),
                            conds = rep(c("beau", "ophio_cflo"), each=12))
row.names(my_sample_col) <- colnames(zscore.rhy.homology.dat)

# Manual color palette
my_colour = list(
  phase = c(light = "#F2E18D", dark = "#010440"),
  conds = c(beau = "#5A829F", ophio_cflo = "#AD212F"),
  cluster = viridis::cividis(100)[c(seq(10,80,by=round(80/(n_clusters), 0)))])

# Color scale
my.breaks = seq(min(zscore.rhy.homology.dat), max(zscore.rhy.homology.dat), by=0.1)
# my.breaks = seq(min(zscore.rhy), max(zscore.rhy), by=0.06)

# Let's plot!
pheatmap(zscore.rhy.homology.dat, show_rownames = F, show_colnames = F,
           annotation_row = my_gene_col, 
           annotation_col = my_sample_col,
           cutree_rows = n_clusters, # OG was 4
           # cutree_cols = 4,
           gaps_col = c(12,24),
           annotation_colors = my_colour,
           border_color=FALSE,
           cluster_cols = F,
           breaks = my.breaks,
           ## color scheme borrowed from: 
           color = inferno(length(my.breaks) - 1),
           treeheight_row = 0,
           # treeheight_col = 0,
           # remove the color scale or not
           main = paste0("24h-rhythmic in Ocflo or Beau \n (n=",
                         nrow(zscore.rhy.homology.dat), " orthologous genes)"),
           ## annotation legend
           annotation_legend = T,
           ## Color scale
           legend = T)

Individual clusters

sampleName <- c("ophio_cflo","ophio_ophio-infected")

for (j in 1:2) {
  
  for (i in 1:n_clusters){
    
    writeLines(paste0("Species: ", sample.name[[j]], "\n", "24h-rhythmic genes, Cluster: ", i))
    
    # Summary
    genes <- my_gene_col %>% rownames_to_column("g") %>% filter(cluster==as.character(i)) %>% pull(g)
    writeLines(paste0("n(genes) = ", length(genes),"\n"))
    
    # define the background geneset for enrichment analysis
    bg.genes <- homology.dat %>% pull(ophio_gene) %>% unique()
    
    ## Transform gene names (ophio -> beau) and refine background geneset
    if (j == 1) {
      genes <- 
        homology.dat %>% 
        filter(ophio_gene %in% genes) %>% 
        pull(beau_gene)
      bg.genes <- homology.dat %>% pull(beau_gene) %>% unique()
    }
    
    # Enrichment
    overrepresented.terms <-
      genes %>% 
        check_enrichment(.,
                      function.dir = path_to_repo,
                      what = "pfams",
                      org = sample.name[[j]],
                      bg = expressed[[j]],
                      filter = T,
                      plot = F)
    print(overrepresented.terms)
    writeLines(paste0("\n", "n(overrepresented terms) = ", nrow(overrepresented.terms), "\n"))
    
    # Stacked zplot
    if (j==2) {
      # Stacked zplot
      stacked.plot1 <- genes %>% stacked.zplot_tc6(cond = sampleName[[1]]) %>% pluck(1)
      stacked.plot2 <- genes %>% stacked.zplot_tc6(cond = sampleName[[2]]) %>% pluck(1)
      ggpubr::ggarrange(plotlist=list(stacked.plot1, stacked.plot2),
                    nrow = 1, ncol = 2,
                    widths = c(1,1), labels = NA) %>% 
        print()
    } else {
    genes %>% 
    stacked.zplot_tc6(cond = sample.name[[j]]) %>% 
      multi.plot(rows = 1, cols = 1)
    }
  }
}
## Species: beau
## 24h-rhythmic genes, Cluster: 1
## n(genes) = 718
## 
## [1] "Loading annotation file for Beauveria bassiana"
## [1] "Done."
## [1] "Testing for enrichment..."
## # A tibble: 7 x 7
##   annot_term annot_desc  adj_pVal sam_freq back_freq n_annot_bg gene_name       
##   <chr>      <chr>          <dbl>    <dbl>     <dbl>      <int> <chr>           
## 1 "PF01248"  Ribosomal_…  0          0.007     0.001          5 BBA_01098, BBA_…
## 2 ""         no_desc      0          0.893     0.757       6815 BBA_00007, BBA_…
## 3 "PF00118"  Cpn60_TCP1   0          0.01      0.001         10 BBA_00188, BBA_…
## 4 "PF00226"  DnaJ         0.00016    0.011     0.003         23 BBA_00157, BBA_…
## 5 "PF01423"  LSM          0.00042    0.008     0.002         16 BBA_00156, BBA_…
## 6 "PF00076"  RRM_1        0.00103    0.017     0.006         55 BBA_00301, BBA_…
## 7 "PF00227"  Proteasome   0.00108    0.007     0.002         14 BBA_01972, BBA_…
## 
## n(overrepresented terms) = 7

## Species: beau
## 24h-rhythmic genes, Cluster: 2
## n(genes) = 1026
## 
## [1] "Loading annotation file for Beauveria bassiana"
## [1] "Done."
## [1] "Testing for enrichment..."
## # A tibble: 31 x 7
##    annot_term annot_desc  adj_pVal sam_freq back_freq n_annot_bg gene_name      
##    <chr>      <chr>          <dbl>    <dbl>     <dbl>      <int> <chr>          
##  1 ""         no_desc     0           0.844     0.757       6815 BBA_00011, BBA…
##  2 "PF07714"  Pkinase_Tyr 0           0.035     0.012        105 BBA_00011, BBA…
##  3 "PF00069"  Pkinase     0           0.035     0.014        124 BBA_00011, BBA…
##  4 "PF03810"  IBN_N       0           0.007     0.001          8 BBA_00780, BBA…
##  5 "PF00454"  PI3_PI4_ki… 0.00002     0.006     0.001          8 BBA_02054, BBA…
##  6 "PF13855"  LRR_8       0.00002     0.005     0.001          6 BBA_00407, BBA…
##  7 "PF00168"  C2          0.00002     0.007     0.001         11 BBA_00011, BBA…
##  8 "PF00018"  SH3_1       0.000150    0.009     0.002         20 BBA_00388, BBA…
##  9 "PF14604"  SH3_9       0.00019     0.008     0.002         17 BBA_00388, BBA…
## 10 "PF00176"  SNF2_N      0.0002      0.009     0.002         21 BBA_00577, BBA…
## # … with 21 more rows
## 
## n(overrepresented terms) = 31

## Species: beau
## 24h-rhythmic genes, Cluster: 3
## n(genes) = 451
## 
## [1] "Loading annotation file for Beauveria bassiana"
## [1] "Done."
## [1] "Testing for enrichment..."
## # A tibble: 1 x 7
##   annot_term annot_desc adj_pVal sam_freq back_freq n_annot_bg gene_name        
##   <chr>      <chr>         <dbl>    <dbl>     <dbl>      <int> <chr>            
## 1 ""         no_desc           0    0.852     0.757       6815 BBA_00012, BBA_0…
## 
## n(overrepresented terms) = 1

## Species: beau
## 24h-rhythmic genes, Cluster: 4
## n(genes) = 324
## 
## [1] "Loading annotation file for Beauveria bassiana"
## [1] "Done."
## [1] "Testing for enrichment..."
## # A tibble: 8 x 7
##   annot_term annot_desc adj_pVal sam_freq back_freq n_annot_bg gene_name        
##   <chr>      <chr>         <dbl>    <dbl>     <dbl>      <int> <chr>            
## 1 ""         no_desc    0           0.898     0.757       6815 BBA_00042, BBA_0…
## 2 "PF00004"  AAA        0.000070    0.028     0.006         50 BBA_00380, BBA_0…
## 3 "PF00025"  Arf        0.00248     0.015     0.003         31 BBA_01574, BBA_0…
## 4 "PF08477"  Roc        0.00248     0.015     0.003         31 BBA_01574, BBA_0…
## 5 "PF00071"  Ras        0.00248     0.015     0.004         33 BBA_01574, BBA_0…
## 6 "PF01926"  MMR_HSR1   0.00248     0.015     0.004         33 BBA_01574, BBA_0…
## 7 "PF00076"  RRM_1      0.00773     0.019     0.006         55 BBA_00117, BBA_0…
## 8 "PF08659"  KR         0.0438      0.015     0.007         60 BBA_01241, BBA_0…
## 
## n(overrepresented terms) = 8

## Species: ophio_cflo
## 24h-rhythmic genes, Cluster: 1
## n(genes) = 718
## 
## [1] "Loading annotation file for Ophiocordyceps camponoti-floridani"
## [1] "Done."
## [1] "Testing for enrichment..."
## # A tibble: 10 x 7
##    annot_term annot_desc  adj_pVal sam_freq back_freq n_annot_bg gene_name      
##    <chr>      <chr>          <dbl>    <dbl>     <dbl>      <int> <chr>          
##  1 PF01248    Ribosomal_… 0           0.017     0.001          5 GQ602_001149, …
##  2 PF00118    Cpn60_TCP1  0           0.024     0.002         10 GQ602_000364, …
##  3 PF00226    DnaJ        0.00005     0.027     0.005         23 GQ602_000387, …
##  4 PF00076    RRM_1       0.000140    0.044     0.014         60 GQ602_000296, …
##  5 PF01423    LSM         0.000140    0.02      0.004         16 GQ602_000162, …
##  6 PF00227    Proteasome  0.00048     0.017     0.003         14 GQ602_000370, …
##  7 PF00153    Mito_carr   0.0225      0.02      0.008         35 GQ602_001581, …
##  8 PF13649    Methyltran… 0.0273      0.02      0.009         37 GQ602_001375, …
##  9 PF08241    Methyltran… 0.0283      0.02      0.009         38 GQ602_001375, …
## 10 PF00400    WD40        0.0487      0.037     0.021         91 GQ602_000262, …
## 
## n(overrepresented terms) = 10

## Species: ophio_cflo
## 24h-rhythmic genes, Cluster: 2
## n(genes) = 1026
## 
## [1] "Loading annotation file for Ophiocordyceps camponoti-floridani"
## [1] "Done."
## [1] "Testing for enrichment..."
## # A tibble: 34 x 7
##    annot_term annot_desc  adj_pVal sam_freq back_freq n_annot_bg gene_name      
##    <chr>      <chr>          <dbl>    <dbl>     <dbl>      <int> <chr>          
##  1 PF07714    Pkinase_Tyr  0          0.058     0.02          86 GQ602_000284, …
##  2 PF00069    Pkinase      0          0.058     0.021         90 GQ602_000284, …
##  3 PF00664    ABC_membra…  0.00001    0.015     0.003         15 GQ602_000061, …
##  4 PF00168    C2           0.00002    0.012     0.003         11 GQ602_000284, …
##  5 PF00172    Zn_clus      0.00003    0.041     0.017         75 GQ602_000498, …
##  6 PF00454    PI3_PI4_ki…  0.0001     0.009     0.002          8 GQ602_001647, …
##  7 PF03810    IBN_N        0.0001     0.009     0.002          8 GQ602_002695, …
##  8 PF14531    Kinase-like  0.00029    0.012     0.003         14 GQ602_000284, …
##  9 PF00512    HisKA        0.0004     0.008     0.002          7 GQ602_001137, …
## 10 PF03372    Exo_endo_p…  0.0004     0.008     0.002          7 GQ602_002242, …
## # … with 24 more rows
## 
## n(overrepresented terms) = 34

## Species: ophio_cflo
## 24h-rhythmic genes, Cluster: 3
## n(genes) = 451
## 
## [1] "Loading annotation file for Ophiocordyceps camponoti-floridani"
## [1] "Done."
## [1] "Testing for enrichment..."
## # A tibble: 4 x 7
##   annot_term annot_desc  adj_pVal sam_freq back_freq n_annot_bg gene_name       
##   <chr>      <chr>          <dbl>    <dbl>     <dbl>      <int> <chr>           
## 1 PF07690    MFS_1         0.0113    0.052     0.022         95 GQ602_000016, G…
## 2 PF00083    Sugar_tr      0.0474    0.028     0.013         55 GQ602_000016, G…
## 3 PF13450    NAD_bindin…   0.0474    0.02      0.008         36 GQ602_000482, G…
## 4 PF00400    WD40          0.0474    0.04      0.021         91 GQ602_000116, G…
## 
## n(overrepresented terms) = 4

## Species: ophio_cflo
## 24h-rhythmic genes, Cluster: 4
## n(genes) = 324
## 
## [1] "Loading annotation file for Ophiocordyceps camponoti-floridani"
## [1] "Done."
## [1] "Testing for enrichment..."
## # A tibble: 8 x 7
##   annot_term annot_desc adj_pVal sam_freq back_freq n_annot_bg gene_name        
##   <chr>      <chr>         <dbl>    <dbl>     <dbl>      <int> <chr>            
## 1 PF00004    AAA         0.0001     0.052     0.01          45 GQ602_001710, GQ…
## 2 PF08477    Roc         0.00595    0.029     0.007         31 GQ602_001942, GQ…
## 3 PF00025    Arf         0.00595    0.029     0.008         33 GQ602_001942, GQ…
## 4 PF00071    Ras         0.00595    0.029     0.008         33 GQ602_001942, GQ…
## 5 PF01926    MMR_HSR1    0.0119     0.029     0.009         39 GQ602_001353, GQ…
## 6 PF00076    RRM_1       0.0227     0.035     0.014         60 GQ602_000255, GQ…
## 7 PF08659    KR          0.0227     0.029     0.011         47 GQ602_002426, GQ…
## 8 PF00106    adh_short   0.0299     0.029     0.012         51 GQ602_002426, GQ…
## 
## n(overrepresented terms) = 8

Overlap b/n clusters and rhy24

## Visualize the overlap

cluster.dat <- list()
for (i in 1:n_clusters) {
    cluster.dat[[i]] <- my_gene_col %>% 
rownames_to_column("g") %>% filter(cluster==as.character(i)) %>% pull(g)
}
names(cluster.dat) <- paste0("Cluster_",1:4)

for (j in 1:2) {
    
    rhy.dat <- rhy[[j]]
    cluster.dat.dummy <- cluster.dat
    
    if (j == 1) {
      for (i in 1:n_clusters) {
        cluster.dat.dummy[[i]] <- 
            homology.dat %>% 
            filter(ophio_gene %in% cluster.dat.dummy[[i]]) %>% 
            pull(beau_gene)
      }
    }
    
    listInput <- list(rhy.dat, 
                      cluster.dat.dummy[[1]], cluster.dat.dummy[[2]], 
                      cluster.dat.dummy[[3]], cluster.dat.dummy[[4]])
    names(listInput) <- c(paste0(sample.name[[j]], c("_rhy24")), paste0("cluster_",1:4))
    
    library(UpSetR)
    library(viridis)
    # caste.col <- c("#F23030","#1A80D9")
    upset(fromList(listInput), 
          number.angles = 0, point.size = 3, line.size = 1.5, 
          mainbar.y.label = "Number of overlapping genes", 
          sets.x.label = "Sig. rhy genes", 
          text.scale = c(1.5, # y-axis label ("# overlapping genes")
                         2, # y-axis tick labels ("1000, 2000,..")
                         1.5, # label for histogram ("sig. rhy genes")
                         1, # tick labels for histogram
                         1.5, # set names ("Cflo-brain_08h,..") 
                         1.5),
          sets = names(listInput),
          nintersects = 15,
          keep.order = T,
          sets.bar.color = viridis(1),
          # adding queries
          query.legend = "bottom"
          ) %>% 
      print()
  
}

It seems that majority of the genes in Cluster 3 and 4 are sig. rhythmic in Ophio but not in Beau. We will perform the pairwise Fisher’s exact test to find out. Let’s dig in!

NOTE: We need to think about the best way to perform the Fisher’s exact test. For starters, I am transforming all the gene names to Ophio

# LIST ONE - Cluster identity
list1 <- cluster.dat
names(list1) <- names(cluster.dat)

## LIST TWO - ophio rhythmic genes
beau.ortho.rhy <- homology.dat %>% filter(beau_gene %in% rhy[[1]]) %>% pull(ophio_gene) %>% unique() 
ocflo.ortho.rhy <- homology.dat %>% filter(ophio_gene %in% rhy[[2]]) %>% pull(ophio_gene) %>% unique()
list2 <- list(beau.ortho.rhy, ocflo.ortho.rhy)
names(list2) <- paste0(sample.name[1:2], c("_24h"))

## CHECK FOR OVERLAP
library(GeneOverlap)

# define the background geneset 
# in our case, it would be the number of orthologous genes between beau and Ophio_cflo
nGenes = homology.dat %>% pull(ophio_gene) %>% unique() %>% length()

## make a GOM object
gom.1v2 <- newGOM(list1, list2, genome.size = nGenes)
png(paste0(path_to_repo, "/results/figures/BD/ocflo_beau_orthologs_rhy_overlap.png"),
    width = 15, height = 15, units = "cm", res = 300)
drawHeatmap(gom.1v2,
              adj.p=T,
              cutoff=0.01,
              what="odds.ratio",
              # what="Jaccard",
              log.scale = T,
              note.col = "grey60")
trash <- dev.off()
Orthologous rhy24 genes

Orthologous rhy24 genes

As we predicted, Cluster 3 and 4 genes show a stronger overlap with 24h-rhythmic genes in O. cflo as comapred to Beauveria (as can be seen from both log2-odds ratio and the associated q-value). The signal is strongest for Cluster 4, so let’s see which genes are in this cluster.

Cluster-4 genes

## Get the annotation data
ocflo.annots <- read.csv(paste0(path_to_repo, "/data/ophio_cflo_TC6_data.csv"), stringsAsFactors = F) %>% as.tibble()
ocflo.annots %<>% 
  filter(expressed=="yes") %>%
  select(gene_name = gene_ID_ncbi, gene_ID_robin, blast_annot, GammaP_24h, GOs:ophio_kim_homolog)

## Subset the gene_names
ocflo.annots %>%
  filter(gene_name %in% cluster.dat[[4]]) %>%

  # plot the q-values
  ggplot() +
  geom_density(aes(x=GammaP_24h)) +
  labs(x="rhythmicity, 24h (q-value)") +
  scale_x_continuous(breaks = c(0,0.05, 0.1, 0.5, 1)) +
  theme_Publication()

  # which genes?
  # filter(!is.na(GOs)) %>% 
  # view()


## Check these genes for other annotations (signalP, SSP, TMHMM)
# LIST ONE - Cluster identity
list1 <- cluster.dat
names(list1) <- names(cluster.dat)

## LIST TWO - ophio rhythmic genes
signalP <- ocflo.annots %>% filter(signalP == "yes") %>% pull(gene_name)
SSP <- ocflo.annots %>% filter(SSP == "yes") %>% pull(gene_name)
TMHMM <- ocflo.annots %>% filter(TMHMM == "yes") %>% pull(gene_name)
list2 <- list(signalP, SSP, TMHMM)
names(list2) <- paste0(sample.name[[2]], "-", c("signalP", "SSP", "TMHMM"))

## CHECK FOR OVERLAP
library(GeneOverlap)

# define the background geneset 
# in our case, it would be the number of orthologous genes between beau and Ophio_cflo
nGenes = ocflo.annots %>% nrow() 

## make a GOM object
gom <- newGOM(list1, list2, genome.size = nGenes)
png(paste0(path_to_repo, "/results/figures/BD/ocflo_beau_orthologs_annots_overlap.png"),
    width = 15, height = 15, units = "cm", res = 300)
drawHeatmap(gom,
              adj.p=T,
              cutoff=0.01,
              what="odds.ratio",
              # what="Jaccard",
              log.scale = T,
              note.col = "grey60")
trash <- dev.off()
Overlap of orthologous rhy24 gene clusters with additional annotations

Overlap of orthologous rhy24 gene clusters with additional annotations

4. Ophio DEGs during infection

Do prep for analyses

Prepare the functions, libraries required

# Let's load functions for running limorhyde
source(system.file('extdata', 'vignette_functions.R', package = 'limorhyde'))
# Let's load the libraries required for running Limorhyde
# library('annotate')
library('data.table')
library('foreach')
# library('GEOquery')
library('ggplot2')
library('knitr')
library('limma')
library('limorhyde')

conflict_prefer("union", "dplyr")

Format metadata

Create dataframe with metadata information for the different samples collected

sampleName <- c("ophio_cflo","ophio_ophio-infected")
short.name <- c("AC","AI") # AC = arb2-control, AI = arb2-infection
time.points <- c(2,4,6,8,10,12,14,16,18,20,22,24)
light.dark <- c(rep("light",times=5), rep("dark",times=6), rep("light", times=1))

meta <- data.frame(title = paste0(rep(sampleName, each=12),"_ZT",time.points),
                       sample = paste0(rep(time.points, times=2),rep(short.name, each=12)),
                       genotype = rep(sampleName, each=12),
                       time = rep(time.points, times=2),
                       cond = rep(sampleName, each=12),
                       LD = rep(light.dark, times=2),
                       stringsAsFactors = F)

meta %>% glimpse()
## Observations: 24
## Variables: 6
## $ title    <chr> "ophio_cflo_ZT2", "ophio_cflo_ZT4", "ophio_cflo_ZT6", "ophio…
## $ sample   <chr> "2AC", "4AC", "6AC", "8AC", "10AC", "12AC", "14AC", "16AC", …
## $ genotype <chr> "ophio_cflo", "ophio_cflo", "ophio_cflo", "ophio_cflo", "oph…
## $ time     <dbl> 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 2, 4, 6, 8, 10, …
## $ cond     <chr> "ophio_cflo", "ophio_cflo", "ophio_cflo", "ophio_cflo", "oph…
## $ LD       <chr> "light", "light", "light", "light", "light", "dark", "dark",…

Now, format the metadata.

### 1.1.1 Format the meta-data ----------------
# load the meta-data
sm <- meta
# Let's format the columns in the right data-type
sm$time <- as.numeric(sm$time)
# sm$batch <- as.factor(sm$batch)
sm$LD <- as.factor(sm$LD)
# sm$location <- as.factor(sm$location)

# Let's get a glimpse of the metadata
sm %>% as_tibble() %>% head()
## # A tibble: 6 x 6
##   title           sample genotype    time cond       LD   
##   <chr>           <chr>  <chr>      <dbl> <chr>      <fct>
## 1 ophio_cflo_ZT2  2AC    ophio_cflo     2 ophio_cflo light
## 2 ophio_cflo_ZT4  4AC    ophio_cflo     4 ophio_cflo light
## 3 ophio_cflo_ZT6  6AC    ophio_cflo     6 ophio_cflo light
## 4 ophio_cflo_ZT8  8AC    ophio_cflo     8 ophio_cflo light
## 5 ophio_cflo_ZT10 10AC   ophio_cflo    10 ophio_cflo light
## 6 ophio_cflo_ZT12 12AC   ophio_cflo    12 ophio_cflo dark
# Next we use limorhyde to calculate time_cos and time_sin, which are based on the first
#harmonic of a Fourier decomposition of the time column, and append them to the sm data frame.
sm = cbind(sm, limorhyde(sm$time, 'time_'))
# convert the dataframe into a data.table
sm <- data.table(sm)
# check that it worked
sm[1:5, ]
##              title sample   genotype time       cond    LD      time_cos
## 1:  ophio_cflo_ZT2    2AC ophio_cflo    2 ophio_cflo light  8.660254e-01
## 2:  ophio_cflo_ZT4    4AC ophio_cflo    4 ophio_cflo light  5.000000e-01
## 3:  ophio_cflo_ZT6    6AC ophio_cflo    6 ophio_cflo light  6.123234e-17
## 4:  ophio_cflo_ZT8    8AC ophio_cflo    8 ophio_cflo light -5.000000e-01
## 5: ophio_cflo_ZT10   10AC ophio_cflo   10 ophio_cflo light -8.660254e-01
##     time_sin
## 1: 0.5000000
## 2: 0.8660254
## 3: 1.0000000
## 4: 0.8660254
## 5: 0.5000000

load data

## DATASET 1
## Load the control O.cflo data (from TC6)
ocflo.control.dat <-
  data.db %>% 
  tbl(., paste0(sampleName[[1]], "_fpkm")) %>% 
  select(gene_name, everything()) %>%
  collect()

## DATASET 2
## Load the O.cflo infection data from the mixed transcriptomics study (from TC7)
inf.db <- dbConnect(RSQLite::SQLite(),
                   paste0(path_to_repo,"/../Das_et_al_2022b/data/databases/TC7_data.db"))
# src_dbi(inf.db)
# extract the (gene-expr X time-point) data
ocflo.inf.dat <-
  inf.db %>%
  tbl(., paste0(sampleName[[2]], "_fpkm")) %>%
  select(gene_name, everything()) %>%
  collect()

filter data

The goal is to use only the genes that show expression (>1 FPKM) for at least half of the timepoints during the 24h day (i.e., 6 of the 12 timepoints).

## DATASET 1
n.exp.1 <- apply(ocflo.control.dat[-1], 1, function(x) sum(x>=1))
ocflo.control.dat <- ocflo.control.dat[which(n.exp.1>=6),]
colnames(ocflo.control.dat)[-1] <- paste0("ZT", meta[meta$cond==sampleName[[1]],] %>% pull(sample))

## DATASET 2
n.exp.2 <- apply(ocflo.inf.dat[-1], 1, function(x) sum(x>=1))
ocflo.inf.dat <- ocflo.inf.dat[which(n.exp.2>=6),]
colnames(ocflo.inf.dat)[-1] <- paste0("ZT", meta[meta$cond==sampleName[[2]],] %>% pull(sample))

## Use the genes that are expressed in both conditions
emat <-
  ocflo.control.dat %>% 
  filter(gene_name %in% ocflo.inf.dat$gene_name) %>% 
  left_join(ocflo.inf.dat, by="gene_name") %>% 
  as.data.frame()

Run analyses

Next, let’s perform the DEG analyses for the ophio-cflo (halfway through infection v. controls)

### Convert to a matrix

# save gene names as row names
rownames(emat) <- emat[,1]
emat <- emat[,-1]
# Need to make the emat into a matrix.
emat <- data.matrix(emat)
# log2 transform the data
emat <- log2(emat + 1)


### Set thresholds
# Set threshold for q-value and log2FC
q.threshold <- 0.05 # currently, using 5% FDR (BH adjusted p-value)
log2.foldchange <- 1 # thus, any gene with a 2^(log2.foldchange) fold change in it's expression

### Format the metadata, if necessary
# Filter the metadata according to your comparison
sm.sub <- sm %>% filter(cond %in% c(sampleName))
# Define the cond column as a factor
sm.sub$cond <- as.factor(sm.sub$cond)

### Let's run the DEG analyses
# Use the subsetted emat to find DEGs
design.deg = model.matrix(~ cond + time_cos + time_sin, data = sm.sub)
#
fit = lmFit(emat, design.deg)
fit = eBayes(fit, trend = TRUE)
# Take a look at the coefficients table
# fit$coefficients %>% head()
#
deLimma.deg = data.table(topTable(fit, coef = 2, number = Inf), keep.rownames = TRUE)
setnames(deLimma.deg, 'rn', 'gene_name')
deLimma.deg[, adj.P.Val := p.adjust(P.Value, method = 'BH')]
setorderv(deLimma.deg, 'adj.P.Val')

### Annotate the results
# Annotate the results to indicate the significant genes
all.DEGs <-
  deLimma.deg %>% 
  arrange(desc(abs(logFC)), adj.P.Val) %>% 
  mutate(sig = as.factor(ifelse(adj.P.Val < q.threshold & abs(logFC) >= log2.foldchange, "yes", "no"))) %>% 
  mutate(inf_v_control = as.factor(ifelse(sig=="yes", ifelse( logFC > 0, "up", "down" ), "NA"))) %>% 
  mutate(inf = sampleName[[2]])

### Summarize the results
writeLines(paste0("\nControl-", sampleName[[1]], " v. ", sampleName[[2]], "\n--Results of DEG analysis--"))
## 
## Control-ophio_cflo v. ophio_ophio-infected
## --Results of DEG analysis--
## How many DEGs - 5% FDR and ≥ 1 fold change in gene expression
all.DEGs %>% 
  # filter(adj.P.Val < q.threshold) %>%
  # filter(abs(logFC) >= 2) %>% # change the criteria here for top DEG or all DEG (logFC≥1)
  filter(sig == "yes") %>% 
  
  # pull(gene_name) %>% 
  
  group_by(inf_v_control) %>% 
  summarise(n_genes = n()) %>% 
  as.data.frame() %>% 
    ## n = 81 up- and 141 down-regulated genes in Cflo heads during Ophio-infection 
    ## (at 5% FDR; log2-fold-change ≥ 1) 
  print()
##   inf_v_control n_genes
## 1          down     395
## 2            up     318
### Subset to keep only sig. DEGs
sig.DEGs <- all.DEGs %>% filter(sig=="yes")

Visualize results

# Volcano plot 
library(viridis)

ggplot(all.DEGs) +
  # geom_hline(yintercept = -log10(0.05), col="red", alpha=0.6) +
  # geom_vline(xintercept = c(-2,2), col="grey60", alpha=0.75) +
  geom_point(aes(x = logFC, y = -log10(adj.P.Val), color=sig), size = 1.5, alpha = 0.5) +
  labs(x = expression(log[2]*' fold-change (inf_v_control)'), 
       y = expression(-log[10]*' '*q[DE]),
       title = "O.cflo (infection v. control)",
       color = "significant") +
  # scale_x_continuous(limits = c(-5,3),
  #                    breaks = c(-5,-4,-3,-2,-1,0,1,2,3),
  #                    labels = c("-5","","-3","","-1","","1","","3")) +
  # xlim(c(-50,50)) +
  theme_Publication() +
  scale_color_viridis(discrete = T, direction = -1, option = "viridis")

Load manip data

## Load the ophio DEG (at manipulation) data from Will et al. 2020
will2020_data <- read.csv(paste0(path_to_repo,
                                 "/data/input/ophio_cflo/complete_annotations/FullBlast_EC05_RNAseq_orignal_copy_26Aug19.csv"),
                          stringsAsFactors = F)
will2020_data %<>% 
  as_tibble() %>% 
  filter(sample_1=="Alive" & sample_2=="Fungus") %>%
  select(arb2_gene, logFC = log2.fold_change., q_value, significant) %>% 
  mutate(logFC=as.numeric(logFC), q_value=as.numeric(q_value)) %>%
  filter(significant=="yes") %>% 
  mutate(up_down = ifelse(logFC > 0, "down", "up")) %>%
  mutate(logFC = -1*logFC) %>% 
  na.omit()

### Change ophio gene names to ncbi IDs
will2020_data %<>% 
  left_join(ocflo.annots[1:3], by=c("arb2_gene"="gene_ID_robin")) %>%
  select(-1) %>% 
  select(gene_name, blast_annot, everything())

5. Overlap between DEGs during infection v. manipulation

### Subset the up/down-regulated genes
### At halfway-through disease progression
inf.up <- sig.DEGs %>% filter(inf_v_control=="up") %>% pull(gene_name)
inf.down <- sig.DEGs %>% filter(inf_v_control=="down") %>% pull(gene_name)
### At active manipulation
manip.up <- will2020_data %>% filter(up_down=="up") %>% pull(gene_name)
manip.down <- will2020_data %>% filter(up_down=="down") %>% pull(gene_name)


### Visualize the results
listInput <- list(inf.up, inf.down, manip.up, manip.down)
names(listInput) <- c(paste0("inf_",c("up","down")), paste0("manip_", c("up","down")))
    
library(UpSetR)
library(viridis)
upset(fromList(listInput), 
  number.angles = 0, point.size = 3, line.size = 1.5, 
  mainbar.y.label = "Number of overlapping genes", 
  sets.x.label = "Sig. DE genes", 
  text.scale = c(1.5, # y-axis label ("# overlapping genes")
                 2, # y-axis tick labels ("1000, 2000,..")
                 1.5, # label for histogram ("sig. rhy genes")
                 1, # tick labels for histogram
                 1.5, # set names ("Cflo-brain_08h,..") 
                 1.5),
  sets = names(listInput),
  nintersects = 15,
  keep.order = T,
  sets.bar.color = viridis(1),
  # adding queries
  query.legend = "bottom"
  ) %>% 
print()

### Test significance of overlap
list1 <- list(inf.up, inf.down)
names(list1) <- paste0("inf_",c("up","down"))
list2 <- list(manip.up, manip.down)
names(list2) <- paste0("manip_", c("up","down"))
bg.genes <- all.DEGs %>% nrow()

overlap <- check_overlap(list1, list2, bg.genes)

## $rowInd
## [1] 2 1
## 
## $colInd
## [1] 1 2
## 
## $call
## heatmap.2(x = plot.mat, Rowv = NA, Colv = NA, dendrogram = "none", 
##     scale = "none", col = brewer.pal(ncolused, grid.col), colsep = col_sep, 
##     rowsep = row_sep, sepcolor = "white", sepwidth = c(0.002, 
##         0.002), cellnote = note.mat, notecex = 1.6, notecol = note.col, 
##     trace = "none", margins = margins_use, cexRow = row_cexrc, 
##     cexCol = col_cexrc, key = T, keysize = key_size, density.info = "none", 
##     main = main.txt, xlab = footnote)
## 
## $carpet
##            inf_down   inf_up
## manip_up    0.00000 2.075416
## manip_down  1.20292 0.000000
## 
## $rowDendrogram
## 'dendrogram' with 2 branches and 2 members total, at height 1.414214 
## 
## $colDendrogram
## 'dendrogram' with 2 branches and 2 members total, at height 1.414214 
## 
## $breaks
##  [1] 0.0000000 0.2306017 0.4612035 0.6918052 0.9224070 1.1530087 1.3836105
##  [8] 1.6142122 1.8448140 2.0754157
## 
## $col
## [1] "#F7FCF5" "#E5F5E0" "#C7E9C0" "#A1D99B" "#74C476" "#41AB5D" "#238B45"
## [8] "#006D2C" "#00441B"
## 
## $colorTable
##         low      high   color
## 1 0.0000000 0.2306017 #F7FCF5
## 2 0.2306017 0.4612035 #E5F5E0
## 3 0.4612035 0.6918052 #C7E9C0
## 4 0.6918052 0.9224070 #A1D99B
## 5 0.9224070 1.1530087 #74C476
## 6 1.1530087 1.3836105 #41AB5D
## 7 1.3836105 1.6142122 #238B45
## 8 1.6142122 1.8448140 #006D2C
## 9 1.8448140 2.0754157 #00441B
## 
## $layout
## $layout$lmat
##      [,1] [,2]
## [1,]    4    3
## [2,]    2    1
## 
## $layout$lhei
## [1] 1.485097 4.000000
## 
## $layout$lwid
## [1] 1.485097 4.000000

6. Daily exp (DEGs at inf)

Let’s plot the daily expression of the DEGs (ocflo controls v. during infection)

# inf.down %>% 
#   # stacked.zplot_tc6(cond = "inf")
#   stacked.zplot_tc6(cond = "ophio")

Clustering of DEGs

  • perform hierarchical clustering of DE genes into four clusters;
  • plot time-course heatmaps for the clustered 24h-rhythmic geneset
  • Identify the day-peaking and night-peaking clusters visually.
## Get the ocflo infection timecourse data (zscores)
zscore.inf.dat <- inf.db %>% tbl(., paste0(sampleName[[2]], "_zscores")) %>% collect()
colnames(zscore.inf.dat)[-1] <- paste0("ZT",meta[meta$cond==sampleName[[2]],] %>% pull(sample))

## Specify parameters
n_clusters <- 4
which.degs <- list(inf.up, inf.down, manip.up, manip.down)
names(which.degs) <- c("inf.up", "inf.down", "manip.up", "manip.down")
  
for (i in 1:length(which.degs)) {
    
    ## Which genes to look at?
    
    # which.genes <- c(inf.up,inf.down)
    which.genes <- which.degs[[i]]
    
    ### Make the dataframe for plotting
    zscore.deg.dat <-
      zscore.dat[[2]] %>% 
      filter(gene_name %in% which.genes) %>% 
      # add data from infection
      left_join(zscore.inf.dat, by="gene_name") %>% 
      # drop any genes without expression values (NA)
      na.omit() %>% 
      as.data.frame() %>% 
      # set genes as rownames
      column_to_rownames("gene_name")
    
    # Set genes as rownames and convert it into a matrix
    # rownames(zscore.rhy.homology.dat) = zscore.rhy.homology.dat$gene_name
    zscore.deg.dat <- as.matrix(zscore.deg.dat)
    
    # Hierarchical clustering of the genesets
    my_hclust_gene <- hclust(dist(zscore.deg.dat), method = "complete")
    
    # Make annotations for the heatmaps
    my_clusters <- cutree(tree = as.dendrogram(my_hclust_gene), k = n_clusters) # k= number of clusters
    my_gene_col <- data.frame(cluster = my_clusters)
    
    
    # I’ll add some column annotations and create the heatmap.
    # Annotations for:
    # 1. Is the sample collected during the light or dark phase? 
    my_sample_col <- data.frame(phase = rep(rep(c("light", "dark", "light"),c(5,6,1)),2),
                                conds = rep(c("ocflo_controls", "ocflo_infection"), each=12))
    row.names(my_sample_col) <- colnames(zscore.deg.dat)
    
    # Manual color palette
    my_colour = list(
      phase = c(light = "#F2E18D", dark = "#010440"),
      conds = c(ocflo_controls = col.scheme[[2]], ocflo_infection = col.scheme[[4]]),
      cluster = viridis::cividis(100)[c(seq(10,80,by=round(80/(n_clusters), 0)))])
    
    # Color scale
    my.breaks = seq(min(zscore.deg.dat), max(zscore.deg.dat), by=0.1)
    # my.breaks = seq(min(zscore.rhy), max(zscore.rhy), by=0.06)
    
    # Let's plot!
    pheatmap(zscore.deg.dat, show_rownames = F, show_colnames = F,
               annotation_row = my_gene_col, 
               annotation_col = my_sample_col,
               cutree_rows = n_clusters, # OG was 4
               # cutree_cols = 2,
               gaps_col = c(12,24),
               annotation_colors = my_colour,
               border_color=FALSE,
               cluster_cols = F,
               breaks = my.breaks,
               ## color scheme borrowed from: 
               color = inferno(length(my.breaks) - 1),
               treeheight_row = 0,
               # treeheight_col = 0,
               # remove the color scale or not
               main = paste0("DEGs - ",names(which.degs)[[i]], "\n (n=",nrow(zscore.deg.dat), " genes)"),
               ## annotation legend
               annotation_legend = T,
               ## Color scale
               legend = T) %>% 
      print() 
    
    
    for (j in 1:n_clusters){
    
    writeLines(paste0("Which DEGs: ", names(which.degs)[[i]], "\n", "Cluster: ", j))
    
    # Summary
    genes <- my_gene_col %>% rownames_to_column("g") %>% filter(cluster==as.character(j)) %>% pull(g)
    writeLines(paste0("n(genes) = ", length(genes),"\n"))
    
    # define the background geneset for enrichment analysis
    bg.genes <- all.DEGs %>% pull(gene_name)
    
    # Enrichment
    overrepresented.terms <-
      genes %>% 
        check_enrichment(.,
                      function.dir = path_to_repo,
                      what = "pfams",
                      org = sampleName[[1]],
                      bg = bg.genes,
                      filter = T,
                      plot = F)
    writeLines(paste0("\n", "n(overrepresented terms) = ", nrow(overrepresented.terms), "\n"))
    
    # Stacked zplot
    stacked.plot1 <- genes %>% stacked.zplot_tc6(cond = sampleName[[1]]) %>% pluck(1)
    stacked.plot2 <- genes %>% stacked.zplot_tc6(cond = sampleName[[2]]) %>% pluck(1)
    ggpubr::ggarrange(plotlist=list(stacked.plot1, stacked.plot2),
                  nrow = 1, ncol = 2,
                  widths = c(1,1), labels = NA) %>% 
      print()
    
  }
 
}

## Which DEGs: inf.up
## Cluster: 1
## n(genes) = 230
## 
## [1] "Loading annotation file for Ophiocordyceps camponoti-floridani"
## [1] "Done."
## [1] "Testing for enrichment..."
## 
## n(overrepresented terms) = 4

## Which DEGs: inf.up
## Cluster: 2
## n(genes) = 44
## 
## [1] "Loading annotation file for Ophiocordyceps camponoti-floridani"
## [1] "Done."
## 
## n(overrepresented terms) =

## Which DEGs: inf.up
## Cluster: 3
## n(genes) = 34
## 
## [1] "Loading annotation file for Ophiocordyceps camponoti-floridani"
## [1] "Done."
## 
## n(overrepresented terms) =

## Which DEGs: inf.up
## Cluster: 4
## n(genes) = 10
## 
## [1] "Loading annotation file for Ophiocordyceps camponoti-floridani"
## [1] "Done."
## 
## n(overrepresented terms) =

## Which DEGs: inf.down
## Cluster: 1
## n(genes) = 79
## 
## [1] "Loading annotation file for Ophiocordyceps camponoti-floridani"
## [1] "Done."
## 
## n(overrepresented terms) =

## Which DEGs: inf.down
## Cluster: 2
## n(genes) = 169
## 
## [1] "Loading annotation file for Ophiocordyceps camponoti-floridani"
## [1] "Done."
## [1] "Testing for enrichment..."
## 
## n(overrepresented terms) = 3

## Which DEGs: inf.down
## Cluster: 3
## n(genes) = 103
## 
## [1] "Loading annotation file for Ophiocordyceps camponoti-floridani"
## [1] "Done."
## 
## n(overrepresented terms) =

## Which DEGs: inf.down
## Cluster: 4
## n(genes) = 44
## 
## [1] "Loading annotation file for Ophiocordyceps camponoti-floridani"
## [1] "Done."
## 
## n(overrepresented terms) =

## Which DEGs: manip.up
## Cluster: 1
## n(genes) = 275
## 
## [1] "Loading annotation file for Ophiocordyceps camponoti-floridani"
## [1] "Done."
## [1] "Testing for enrichment..."
## 
## n(overrepresented terms) = 3

## Which DEGs: manip.up
## Cluster: 2
## n(genes) = 432
## 
## [1] "Loading annotation file for Ophiocordyceps camponoti-floridani"
## [1] "Done."
## [1] "Testing for enrichment..."
## 
## n(overrepresented terms) = 9

## Which DEGs: manip.up
## Cluster: 3
## n(genes) = 788
## 
## [1] "Loading annotation file for Ophiocordyceps camponoti-floridani"
## [1] "Done."
## [1] "Testing for enrichment..."
## 
## n(overrepresented terms) = 8

## Which DEGs: manip.up
## Cluster: 4
## n(genes) = 368
## 
## [1] "Loading annotation file for Ophiocordyceps camponoti-floridani"
## [1] "Done."
## [1] "Testing for enrichment..."
## 
## n(overrepresented terms) = 4

## Which DEGs: manip.down
## Cluster: 1
## n(genes) = 244
## 
## [1] "Loading annotation file for Ophiocordyceps camponoti-floridani"
## [1] "Done."
## [1] "Testing for enrichment..."
## 
## n(overrepresented terms) = 2

## Which DEGs: manip.down
## Cluster: 2
## n(genes) = 420
## 
## [1] "Loading annotation file for Ophiocordyceps camponoti-floridani"
## [1] "Done."
## [1] "Testing for enrichment..."
## 
## n(overrepresented terms) = 3

## Which DEGs: manip.down
## Cluster: 3
## n(genes) = 371
## 
## [1] "Loading annotation file for Ophiocordyceps camponoti-floridani"
## [1] "Done."
## [1] "Testing for enrichment..."
## 
## n(overrepresented terms) = 2

## Which DEGs: manip.down
## Cluster: 4
## n(genes) = 588
## 
## [1] "Loading annotation file for Ophiocordyceps camponoti-floridani"
## [1] "Done."
## [1] "Testing for enrichment..."
## 
## n(overrepresented terms) = 5